First thing first, before starting the project we need to include the package that required for the project.
library(tidyverse) #data preprocess
library(ggplot2) #data visualization
library(caret) #predictive modelLangkah selanjutnya adalah memasukan data kedalam environtment project lalu melihat tipe class data yang ada pada data.
df <- readxl::read_xlsx("dataset/data_sulni.xlsx", sheet=1, col_names = TRUE)
map <- readxl::read_xlsx("dataset/data_sulni.xlsx", sheet=2, col_names = TRUE)## Classes 'tbl_df', 'tbl' and 'data.frame': 131 obs. of 15 variables:
## $ date : POSIXct, format: "2008-01-01" "2008-02-01" ...
## $ p_debt_agriculture : num 3359 3353 3427 3424 3426 ...
## $ p_debt_mining : num 6241 6472 6421 6588 6817 ...
## $ p_debt_manufacturing: num 18766 18558 19043 19188 19506 ...
## $ p_debt_electricity : num 7716 7729 7358 7005 7141 ...
## $ p_debt_usd : num 52459 53202 53908 54730 54525 ...
## $ g_debt_agriculture : num 1615 1611 1680 1638 1625 ...
## $ g_debt_mining : num 701 699 732 717 708 ...
## $ g_debt_manufacturing: num 2614 2624 2707 2588 2555 ...
## $ g_debt_electricity : num 6412 6414 6528 6371 6283 ...
## $ g_debt_usd : num 26843 26741 29032 28938 28753 ...
## $ reserve_position : num 55999 57125 58987 58770 57464 ...
## $ us_prime_rates : num 6 6 5.25 5 5 5 5 5 5 4 ...
## $ usd_idr : num 9246 9065 9215 9222 9315 ...
## $ trend : num 1 0 1 1 1 0 0 1 1 1 ...
## date p_debt_agriculture p_debt_mining
## Min. :2008-01-01 00:00:00 Min. :3353 Min. : 6241
## 1st Qu.:2010-09-16 00:00:00 1st Qu.:4605 1st Qu.:11459
## Median :2013-06-01 00:00:00 Median :5901 Median :22882
## Mean :2013-06-01 03:28:51 Mean :6053 Mean :19044
## 3rd Qu.:2016-02-15 12:00:00 3rd Qu.:7398 3rd Qu.:25277
## Max. :2018-11-01 00:00:00 Max. :9378 Max. :28898
## p_debt_manufacturing p_debt_electricity p_debt_usd g_debt_agriculture
## Min. :18558 Min. : 5378 Min. : 52459 Min. :1050
## 1st Qu.:19901 1st Qu.:12517 1st Qu.: 70446 1st Qu.:1309
## Median :27370 Median :16966 Median :119102 Median :1558
## Mean :27413 Mean :16304 Mean :112362 Mean :1546
## 3rd Qu.:34059 3rd Qu.:19657 3rd Qu.:149778 3rd Qu.:1778
## Max. :37268 Max. :29843 Max. :169622 Max. :2197
## g_debt_mining g_debt_manufacturing g_debt_electricity g_debt_usd
## Min. :399.5 Min. : 415.0 Min. :3295 Min. :26741
## 1st Qu.:455.8 1st Qu.: 769.6 1st Qu.:3567 1st Qu.:37060
## Median :632.4 Median :1290.3 Median :4476 Median :49782
## Mean :630.8 Mean :1476.7 Mean :4757 Mean :53042
## 3rd Qu.:788.9 3rd Qu.:2209.1 3rd Qu.:5762 3rd Qu.:69094
## Max. :869.4 Max. :2706.5 Max. :6838 Max. :86250
## reserve_position us_prime_rates usd_idr trend
## Min. : 50182 Min. :3.250 Min. : 8500 Min. :0.0000
## 1st Qu.: 89175 1st Qu.:3.250 1st Qu.: 9281 1st Qu.:0.0000
## Median :107543 Median :3.250 Median :11360 Median :1.0000
## Mean : 99011 Mean :3.832 Mean :11264 Mean :0.5649
## 3rd Qu.:114376 3rd Qu.:4.375 3rd Qu.:13325 3rd Qu.:1.0000
## Max. :131980 Max. :6.000 Max. :15202 Max. :1.0000
## Classes 'tbl_df', 'tbl' and 'data.frame': 5124 obs. of 9 variables:
## $ category_1: chr "I. Barang dagangan umum" "I. Barang dagangan umum" "I. Barang dagangan umum" "I. Barang dagangan umum" ...
## $ category_2: chr "A. Hasil pertanian" "A. Hasil pertanian" "A. Hasil pertanian" "A. Hasil pertanian" ...
## $ category_3: chr "Biji coklat" "Udang" "Biji kopi" "Ikan dan lain-lain" ...
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ year : num 2013 2013 2013 2013 2013 ...
## $ export_usd: chr "32636.12" "84387.33" "78193.47" "78708.149999999994" ...
## $ import_usd: chr "6323.33" "2102.91" "1444.29" "13489.63" ...
## $ date : chr "1/1/2013" "1/1/2013" "1/1/2013" "1/1/2013" ...
## $ count : num 1 2 3 4 5 6 7 8 9 10 ...
## category_1 category_2 category_3 month
## Length:5124 Length:5124 Length:5124 Min. : 1.00
## Class :character Class :character Class :character 1st Qu.: 3.75
## Mode :character Mode :character Mode :character Median : 6.50
## Mean : 6.50
## 3rd Qu.: 9.25
## Max. :12.00
## year export_usd import_usd date
## Min. :2013 Length:5124 Length:5124 Length:5124
## 1st Qu.:2014 Class :character Class :character Class :character
## Median :2016 Mode :character Mode :character Mode :character
## Mean :2016
## 3rd Qu.:2018
## Max. :2019
## count
## Min. : 1
## 1st Qu.:1282
## Median :2562
## Mean :2562
## 3rd Qu.:3843
## Max. :5124
map$date <- dmy(map$date)
map$export_usd <- as.numeric(map$export_usd)
map$import_usd <- as.numeric(map$import_usd)
summary(map)## category_1 category_2 category_3 month
## Length:5124 Length:5124 Length:5124 Min. : 1.00
## Class :character Class :character Class :character 1st Qu.: 3.75
## Mode :character Mode :character Mode :character Median : 6.50
## Mean : 6.50
## 3rd Qu.: 9.25
## Max. :12.00
##
## year export_usd import_usd date
## Min. :2013 Min. : 0 Min. : 0 Min. :2013-01-01
## 1st Qu.:2014 1st Qu.: 15852 1st Qu.: 3414 1st Qu.:2014-09-23
## Median :2016 Median : 53945 Median : 35964 Median :2016-06-16
## Mean :2016 Mean : 206409 Mean : 191644 Mean :2016-06-16
## 3rd Qu.:2018 3rd Qu.: 220041 3rd Qu.: 141000 3rd Qu.:2018-03-08
## Max. :2019 Max. :2268562 Max. :3253121 Max. :2019-12-01
## NA's :127 NA's :148
## count
## Min. : 1
## 1st Qu.:1282
## Median :2562
## Mean :2562
## 3rd Qu.:3843
## Max. :5124
##
## [1] 127
## [1] 148
## category_1 category_2 category_3 month
## Length:5124 Length:5124 Length:5124 Min. : 1.00
## Class :character Class :character Class :character 1st Qu.: 3.75
## Mode :character Mode :character Mode :character Median : 6.50
## Mean : 6.50
## 3rd Qu.: 9.25
## Max. :12.00
## year export_usd import_usd date
## Min. :2013 Min. : 0 Min. : 0 Min. :2013-01-01
## 1st Qu.:2014 1st Qu.: 13842 1st Qu.: 2805 1st Qu.:2014-09-23
## Median :2016 Median : 51654 Median : 34283 Median :2016-06-16
## Mean :2016 Mean : 201293 Mean : 186109 Mean :2016-06-16
## 3rd Qu.:2018 3rd Qu.: 212219 3rd Qu.: 135138 3rd Qu.:2018-03-08
## Max. :2019 Max. :2268562 Max. :3253121 Max. :2019-12-01
## count
## Min. : 1
## 1st Qu.:1282
## Median :2562
## Mean :2562
## 3rd Qu.:3843
## Max. :5124
p1 <- ggcorr(df, label = T,label_size = 2.9)
df$us_prime_rates <- as.factor(df$us_prime_rates)
p2 <- ggplot(df, aes(x=date, y=usd_idr, color=us_prime_rates)) +
geom_jitter()
p3 <- ggplot() +
geom_line(data=df, aes(x=date, y=usd_idr), color = "red") +
geom_line(data=df, aes(x=date, y=g_debt_electricity), color = "blue") +
geom_line(data=df, aes(x=date, y=p_debt_electricity), color = "green") +
xlab('Dates') +
ylab('value') + labs(colour = "debt type")map1 <- map %>%
filter(category_2 == "A. Hasil pertanian")
map2 <- map %>%
filter(category_2 == "B. Hasil industri")
map3 <- map %>%
filter(category_2 == "C. Hasil pertambangan")
p_1 <- ggplot(
map,
aes(x = export_usd/1000, y=import_usd/1000, colour = factor(category_3))
) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
labs(x = "export_usd", y = "import_usd", colour="category_3") +
facet_wrap(~category_2)
animation1 <- p_1 +
transition_time(date) +
labs(title = "Year: {frame_time}")
p_2 <- ggplot(
map1,
aes(x = export_usd/1000, y=import_usd/1000, colour = factor(category_3))
) +
geom_point(show.legend = TRUE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
labs(x = "export_usd", y = "import_usd", colour=" ") +
theme(legend.position="bottom")
animation2 <- p_2 +
transition_time(date) +
labs(title = "Year: {frame_time}")
animate(animation1, fps=5)set.seed(50)
train.control <- trainControl(method = "cv", number = 10)
df$trend <- as.factor(df$trend)
# Train the model
model <- caret::train(trend ~p_debt_mining + p_debt_manufacturing + g_debt_mining +
g_debt_manufacturing + g_debt_electricity + g_debt_usd +
reserve_position + us_prime_rates, data = df, method = "vglmAdjCat",
trControl = train.control)
# Summarize the results
print(model)## Adjacent Categories Probability Model for Ordinal Data
##
## 131 samples
## 8 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 117, 118, 118, 117, 117, 119, ...
## Resampling results across tuning parameters:
##
## parallel Accuracy Kappa
## FALSE 0.6418396 0.2739485
## TRUE 0.6418396 0.2739485
##
## Tuning parameter 'link' was held constant at a value of loge
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were parallel = FALSE and link = loge.